
max.likelihood = function(y, z, x.delta, x.op.y, alpha.start, zeta.start, weights, 
    max.step, thres, pa, pb, delta.d.est) {
    
    startpars = c(alpha.start, zeta.start)
    
    getProbScalar = getProbScalarRDiff
    
    ## negative log likelihood function
    neg.log.likelihood = function(pars) {
        alpha = pars[1:pa]
        zeta = pars[(pa + 1):(pa + pb)]
        p0p1 = t(mapply(getProbScalar, x.delta %*% alpha, x.op.y %*% zeta, delta.d.est))
        p0 = p0p1[, 1];   p1 = p0p1[, 2]
        
        return(-sum((1 - y[z == 0]) * log(1 - p0[z == 0]) * weights[z == 0] + 
            (y[z == 0]) * log(p0[z == 0]) * weights[z == 0]) - sum((1 - y[z == 
            1]) * log(1 - p1[z == 1]) * weights[z == 1] + (y[z == 1]) * log(p1[z == 
            1]) * weights[z == 1]))
    }
    
    neg.log.likelihood.alpha = function(alpha){
        p0p1  = t(mapply(getProbScalar,x.delta %*% alpha, x.op.y %*% zeta,
                         delta.d.est))
        p0    = p0p1[,1];  p1 = p0p1[,2]
        
        return(-sum((1-y[z==0])*log(1-p0[z==0])*weights[z==0] +
                        (y[z==0])*log(p0[z==0])*weights[z==0]) -
                   sum((1-y[z==1])*log(1-p1[z==1])*weights[z==1] +
                           (y[z==1])*log(p1[z==1])*weights[z==1]))  
    }
    
    neg.log.likelihood.zeta = function(zeta){
        p0p1  = t(mapply(getProbScalar,x.delta%*%alpha, x.op.y%*%zeta, delta.d.est))
        p0    = p0p1[,1];  p1 = p0p1[,2]
        
        return(-sum((1-y[z==0])*log(1-p0[z==0])*weights[z==0] +
                        (y[z==0])*log(p0[z==0])*weights[z==0]) -
                   sum((1-y[z==1])*log(1-p1[z==1])*weights[z==1] +
                           (y[z==1])*log(p1[z==1])*weights[z==1]))  
    }
    
    
    ## Optimization 

    Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
    alpha = alpha.start; zeta = zeta.start
    diff = thres + 1; step = 0
    while(diff > thres & step < max.step){
        step = step + 1
        opt1 = optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max.step))
        diff1 = Diff(opt1$par,alpha)
        alpha = opt1$par
        opt2 = optim(zeta,neg.log.likelihood.zeta,control=list(maxit=max.step))
        diff  = max(diff1,Diff(opt2$par,zeta))
        zeta = opt2$par
    }
    
    opt = list(par = c(alpha,zeta), convergence = (step < max.step), 
               x.deltalue = neg.log.likelihood(c(alpha,zeta)))
    
    return(opt)
}

